{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Text Classification With Machine Learning In Python\n",
    "+ News Category Classifier\n",
    "+ - Aim: To Classify and Predict news feeds into their appropiate Categories\n",
    "+ Credit to Roshan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "### EDA packages and Web\n",
    "import pandas as pd\n",
    "import lxml\n",
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_list = [\"https://www.reuters.com/news/health\",\n",
    "            \"https://www.reuters.com/politics\",\n",
    "            \"https://www.reuters.com/finance\",\n",
    "            \"https://www.reuters.com/news/sports\",\n",
    "            \"https://www.reuters.com/news/technology\"]\n",
    "            \n",
    "feeds_list = [\n",
    "            \"http://feeds.reuters.com/reuters/businessNews\",\n",
    "            \"http://feeds.reuters.com/reuters/technologyNews\",\n",
    "            \"http://feeds.reuters.com/reuters/sportsNews\",\n",
    "            \"http://feeds.reuters.com/reuters/healthNews\",\n",
    "            \"http://feeds.reuters.com/reuters/politicsNews\",]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using LXML \n",
    "from lxml import etree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Scraping and Parsing Data From Feeds_list\n",
    "datafeeds = []\n",
    "for feed in feeds_list:\n",
    "    response = requests.get(feed)\n",
    "    xml_page = response.text\n",
    "    parser = etree.XMLParser(recover=True, encoding='utf-8')\n",
    "    datafeeds.append(etree.fromstring(xml_page.encode(\"utf-8\"), parser=parser))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function for Building Node\n",
    "def print_tag(node):\n",
    "    print(\"<%s %s>%s\" % (node.tag, \" \".join([\"%s=%s\" % (k,v)for k,v in node.attrib.iteritems()]), node.text))\n",
    "    for item in node[:25]:\n",
    "        print(\"  <%s %s>%s</%s>\" % (item.tag, \" \".join([\"%s=%s\" % (k,v)for k,v in item.attrib.iteritems()]), item.text, item.tag))\n",
    "    print(\"</%s>\" % node.tag)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<rss version=2.0>\n",
      "\n",
      "  <channel >\n",
      "</channel>\n",
      "</rss>\n"
     ]
    }
   ],
   "source": [
    "# What we want to select\n",
    "general_node = datafeeds[0]\n",
    "print_tag(general_node)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<channel >\n",
      "\n",
      "  <title >Reuters: Business News</title>\n",
      "  <link >http://www.reuters.com</link>\n",
      "  <description >Reuters.com is your source for breaking news, business, financial and investing news, including personal finance and stocks.  Reuters is the leading global provider of news, financial information and technology solutions to the world's media, financial institutions, businesses and individuals.</description>\n",
      "  <image >\n",
      "\t</image>\n",
      "  <language >en-us</language>\n",
      "  <lastBuildDate >Sat, 14 Apr 2018 10:04:42 -0400</lastBuildDate>\n",
      "  <copyright >All rights reserved. Users may download and print extracts of content from this website for their own personal and non-commercial use only. Republication or redistribution of Reuters content, including by framing or similar means, is expressly prohibited without the prior written consent of Reuters. Reuters and the Reuters sphere logo are registered trademarks or trademarks of the Reuters group of companies around the world. © Reuters 2018</copyright>\n",
      "  <{http://www.w3.org/2005/Atom}link rel=self type=application/rss+xml href=http://feeds.reuters.com/reuters/businessNews>None</{http://www.w3.org/2005/Atom}link>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}info uri=reuters/businessnews>None</{http://rssnamespace.org/feedburner/ext/1.0}info>\n",
      "  <{http://www.w3.org/2005/Atom}link rel=hub href=http://pubsubhubbub.appspot.com/>None</{http://www.w3.org/2005/Atom}link>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}feedFlare href=https://add.my.yahoo.com/rss?url=http%3A%2F%2Ffeeds.reuters.com%2Freuters%2FbusinessNews src=http://us.i1.yimg.com/us.yimg.com/i/us/my/addtomyyahoo4.gif>Subscribe with My Yahoo!</{http://rssnamespace.org/feedburner/ext/1.0}feedFlare>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}feedFlare href=http://www.newsgator.com/ngs/subscriber/subext.aspx?url=http%3A%2F%2Ffeeds.reuters.com%2Freuters%2FbusinessNews src=http://www.newsgator.com/images/ngsub1.gif>Subscribe with NewsGator</{http://rssnamespace.org/feedburner/ext/1.0}feedFlare>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}feedFlare href=http://feeds.my.aol.com/add.jsp?url=http%3A%2F%2Ffeeds.reuters.com%2Freuters%2FbusinessNews src=http://o.aolcdn.com/favorites.my.aol.com/webmaster/ffclient/webroot/locale/en-US/images/myAOLButtonSmall.gif>Subscribe with My AOL</{http://rssnamespace.org/feedburner/ext/1.0}feedFlare>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}feedFlare href=http://www.bloglines.com/sub/http://feeds.reuters.com/reuters/businessNews src=http://www.bloglines.com/images/sub_modern11.gif>Subscribe with Bloglines</{http://rssnamespace.org/feedburner/ext/1.0}feedFlare>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}feedFlare href=http://www.netvibes.com/subscribe.php?url=http%3A%2F%2Ffeeds.reuters.com%2Freuters%2FbusinessNews src=//www.netvibes.com/img/add2netvibes.gif>Subscribe with Netvibes</{http://rssnamespace.org/feedburner/ext/1.0}feedFlare>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}feedFlare href=http://fusion.google.com/add?feedurl=http%3A%2F%2Ffeeds.reuters.com%2Freuters%2FbusinessNews src=http://buttons.googlesyndication.com/fusion/add.gif>Subscribe with Google</{http://rssnamespace.org/feedburner/ext/1.0}feedFlare>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}feedFlare href=http://www.pageflakes.com/subscribe.aspx?url=http%3A%2F%2Ffeeds.reuters.com%2Freuters%2FbusinessNews src=http://www.pageflakes.com/ImageFile.ashx?instanceId=Static_4&fileName=ATP_blu_91x17.gif>Subscribe with Pageflakes</{http://rssnamespace.org/feedburner/ext/1.0}feedFlare>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "  <item >\n",
      "\t\t</item>\n",
      "</channel>\n"
     ]
    }
   ],
   "source": [
    "# Selecting Node\n",
    "general_node = general_node[0]\n",
    "print_tag(general_node)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<item >\n",
      "\t\t\n",
      "  <title >Wall Street eyes earnings stabilizer after FAANG stocks wobble</title>\n",
      "  <description >(Reuters) - Wall Street is hoping that first-quarter earnings growth and corporate forecasts are strong enough to bring the FAANG group of stocks back into favor and take the spotlight off worries that caused the recent sell-off in the high-flying group.<div class=\"feedflare\">\n",
      "<a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=WBFIN23VpFE:YCyjiaDuEFA:yIl2AUoC8zA\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?d=yIl2AUoC8zA\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=WBFIN23VpFE:YCyjiaDuEFA:F7zBnMyn0Lo\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=WBFIN23VpFE:YCyjiaDuEFA:F7zBnMyn0Lo\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=WBFIN23VpFE:YCyjiaDuEFA:V_sGLiPBpWU\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=WBFIN23VpFE:YCyjiaDuEFA:V_sGLiPBpWU\" border=\"0\"></img></a>\n",
      "</div><img src=\"http://feeds.feedburner.com/~r/reuters/businessNews/~4/WBFIN23VpFE\" height=\"1\" width=\"1\" alt=\"\"/></description>\n",
      "  <link >http://feeds.reuters.com/~r/reuters/businessNews/~3/WBFIN23VpFE/wall-street-eyes-earnings-stabilizer-after-faang-stocks-wobble-idUSKBN1HK2O5</link>\n",
      "  <guid isPermaLink=false>http://www.reuters.com/article/us-usa-stocks-weekahead/wall-street-eyes-earnings-stabilizer-after-faang-stocks-wobble-idUSKBN1HK2O5?feedType=RSS&feedName=businessNews</guid>\n",
      "  <category >businessNews</category>\n",
      "  <pubDate >Fri, 13 Apr 2018 21:08:30 -0400</pubDate>\n",
      "  <{http://rssnamespace.org/feedburner/ext/1.0}origLink >http://www.reuters.com/article/us-usa-stocks-weekahead/wall-street-eyes-earnings-stabilizer-after-faang-stocks-wobble-idUSKBN1HK2O5?feedType=RSS&feedName=businessNews</{http://rssnamespace.org/feedburner/ext/1.0}origLink>\n",
      "</item>\n"
     ]
    }
   ],
   "source": [
    "# Specific Selection of Item\n",
    "general_node = general_node.xpath(\"item\")[0]\n",
    "print_tag(general_node)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Grouping them  into List and Array\n",
    "title_list = []\n",
    "description_list = []\n",
    "category_list = []\n",
    "\n",
    "for xml_doc in datafeeds:\n",
    "    articles = xml_doc.xpath(\"//item\")\n",
    "    for article in articles: #0,1,4 instead of 0,2,3\n",
    "        title_list.append(article[0].text)\n",
    "        description_list.append(article[1].text)\n",
    "        category_list.append(article[4].text)\n",
    "        \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "50\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Description</th>\n",
       "      <th>Category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Wall Street eyes earnings stabilizer after FAA...</td>\n",
       "      <td>(Reuters) - Wall Street is hoping that first-q...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Musk insists Tesla does not need more capital,...</td>\n",
       "      <td>(Reuters) - Tesla Inc  will be profitable in t...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Trump says U.S. will only rejoin Pacific trade...</td>\n",
       "      <td>WASHINGTON/TOKYO (Reuters) - U.S. President Do...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wells Fargo faces $1 billion fine from loan ab...</td>\n",
       "      <td>(Reuters) - Two U.S. regulators have proposed ...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Facebook CEO's compensation jumps to $8.9 mill...</td>\n",
       "      <td>(Reuters) - Facebook Inc  Chief Executive Mark...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GE books $4.2 billion charge, restates earning...</td>\n",
       "      <td>NEW YORK (Reuters) - General Electric Co  said...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>U.S. bank executives see delayed boost from ta...</td>\n",
       "      <td>NEW YORK (Reuters) - Banks have not reaped the...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>U.S. lowers NAFTA key auto content demand: aut...</td>\n",
       "      <td>MEXICO CITY/WASHINGTON (Reuters) - U.S. trade ...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>German interior minister rejects union's six p...</td>\n",
       "      <td>BERLIN (Reuters) - German Interior Minister Ho...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Bratz maker's CEO bids $890 million for Toys '...</td>\n",
       "      <td>(Reuters) - Bratz doll maker MGA Entertainment...</td>\n",
       "      <td>businessNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Facebook CEO's compensation jumps to $8.9 mill...</td>\n",
       "      <td>(Reuters) - Facebook Inc  Chief Executive Mark...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Musk insists Tesla does not need more capital,...</td>\n",
       "      <td>(Reuters) - Tesla Inc  will be profitable in t...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>'Gold rush' for Wi-Fi on board planes spurs in...</td>\n",
       "      <td>HAMBURG (Reuters) - Satellite technology to pr...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Russia to ban Telegram messenger over encrypti...</td>\n",
       "      <td>MOSCOW (Reuters) - A Russian court on Friday o...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Trade war or not, China is closing the gap on ...</td>\n",
       "      <td>HONG KONG (Reuters) - China's rising investmen...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Singapore watchdog sets interim measures for U...</td>\n",
       "      <td>SINGAPORE (Reuters) - Singapore's competition ...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Singapore to test facial recognition on lamppo...</td>\n",
       "      <td>SINGAPORE (Reuters) - In the not too distant f...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Microsoft auditing partner KPMG's anti-piracy ...</td>\n",
       "      <td>NEW DELHI (Reuters) - Microsoft Corp is invest...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Lawmakers question FBI over San Bernardino sui...</td>\n",
       "      <td>(Reuters) - A bipartisan group of lawmakers in...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Weibo to ban gay, violent content from platform</td>\n",
       "      <td>SHANGHAI (Reuters) - China's Sina Weibo will r...</td>\n",
       "      <td>technologyNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Blake heads into hiding from Bolt as Jamaica c...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Usain Bolt j...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Vettel snatches pole position in Ferrari one-two</td>\n",
       "      <td>SHANGHAI (Reuters) - Formula One championship ...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Olympics: Phelps says U.S. swim team can thriv...</td>\n",
       "      <td>(Reuters) - Michael Phelps will not be tempted...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Horse racing: Australian mare Winx eases to re...</td>\n",
       "      <td>(Reuters) - Australian champion mare Winx crui...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Highlights on day 10 of Commonwealth Games</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Highlights o...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Athletics: Kirui and Rupp renew Boston Maratho...</td>\n",
       "      <td>(Reuters) - Kenyans Geoffrey Kirui and Edna Ki...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Golf: DeChambeau surges to halfway lead at Hil...</td>\n",
       "      <td>(Reuters) - Bryson DeChambeau birdied four of ...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Australia coach hails fighting spirit after se...</td>\n",
       "      <td>HONG KONG (Reuters) - Australia coach Alen Sta...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>New Zealand's Nyika, women's hockey team crash...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Heavyweight ...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Semenya believes she could go the distance aft...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Caster Semen...</td>\n",
       "      <td>sportsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>EpiPen shortages seen in Canada, UK but U.S. s...</td>\n",
       "      <td>NEW YORK (Reuters) - Mylan N.V.'s  emergency a...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Mylan seeks deal for German Merck's consumer p...</td>\n",
       "      <td>FRANKFURT/NEW YORK (Reuters) - Generic drug ma...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Trump to lift legal threat to states that perm...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Enforcement of Kentucky abortion law suspended...</td>\n",
       "      <td>(Reuters) - Kentucky state officials have agre...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Insect farms gear up to feed soaring global pr...</td>\n",
       "      <td>LANGLEY, British Columbia (Reuters) - Layers o...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>U.S. appeals court strikes down Maryland drug ...</td>\n",
       "      <td>(Reuters) - A federal appeals court on Friday ...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>CVS Health appoints Marc-David Munk as CMO of ...</td>\n",
       "      <td>(Reuters) - CVS Health on Friday named Marc-Da...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>ManorCare wins court approval to exit bankrupt...</td>\n",
       "      <td>(Reuters) - No. 2 U.S. nursing home chain HCR ...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Religious faith linked to suicidal behavior in...</td>\n",
       "      <td>(Reuters Health) - Although religiosity is gen...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Patients more satisfied when doctors treat few...</td>\n",
       "      <td>(Reuters Health) - Doctors who see fewer patie...</td>\n",
       "      <td>healthNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Trump's personal lawyer attacked by U.S. prose...</td>\n",
       "      <td>NEW YORK (Reuters) - A U.S. prosecutor on Frid...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Trump, called an unethical liar in book, blast...</td>\n",
       "      <td>WASHINGTON/NEW YORK (Reuters) - President Dona...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>Trump says U.S. will only rejoin Pacific trade...</td>\n",
       "      <td>WASHINGTON/TOKYO (Reuters) - U.S. President Do...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>With Russia on his mind, Trump looks for tough...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>Republican-led House panel orders interviews w...</td>\n",
       "      <td>WASHINGTON (Reuters) - A Republican-led House ...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>Trump to lift legal threat to states that perm...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>Ex-FBI deputy director faulted by Justice Depa...</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. Department of ...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>Trump lawyer arranged $1.6 million payoff to P...</td>\n",
       "      <td>WASHINGTON (Reuters) - U.S. President Donald T...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>Special counsel has evidence Michael Cohen tra...</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. special counse...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>House passes bill to streamline 'Volcker Rule'</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. House of Repre...</td>\n",
       "      <td>politicsNews</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                Title  \\\n",
       "0   Wall Street eyes earnings stabilizer after FAA...   \n",
       "1   Musk insists Tesla does not need more capital,...   \n",
       "2   Trump says U.S. will only rejoin Pacific trade...   \n",
       "3   Wells Fargo faces $1 billion fine from loan ab...   \n",
       "4   Facebook CEO's compensation jumps to $8.9 mill...   \n",
       "5   GE books $4.2 billion charge, restates earning...   \n",
       "6   U.S. bank executives see delayed boost from ta...   \n",
       "7   U.S. lowers NAFTA key auto content demand: aut...   \n",
       "8   German interior minister rejects union's six p...   \n",
       "9   Bratz maker's CEO bids $890 million for Toys '...   \n",
       "10  Facebook CEO's compensation jumps to $8.9 mill...   \n",
       "11  Musk insists Tesla does not need more capital,...   \n",
       "12  'Gold rush' for Wi-Fi on board planes spurs in...   \n",
       "13  Russia to ban Telegram messenger over encrypti...   \n",
       "14  Trade war or not, China is closing the gap on ...   \n",
       "15  Singapore watchdog sets interim measures for U...   \n",
       "16  Singapore to test facial recognition on lamppo...   \n",
       "17  Microsoft auditing partner KPMG's anti-piracy ...   \n",
       "18  Lawmakers question FBI over San Bernardino sui...   \n",
       "19    Weibo to ban gay, violent content from platform   \n",
       "20  Blake heads into hiding from Bolt as Jamaica c...   \n",
       "21   Vettel snatches pole position in Ferrari one-two   \n",
       "22  Olympics: Phelps says U.S. swim team can thriv...   \n",
       "23  Horse racing: Australian mare Winx eases to re...   \n",
       "24         Highlights on day 10 of Commonwealth Games   \n",
       "25  Athletics: Kirui and Rupp renew Boston Maratho...   \n",
       "26  Golf: DeChambeau surges to halfway lead at Hil...   \n",
       "27  Australia coach hails fighting spirit after se...   \n",
       "28  New Zealand's Nyika, women's hockey team crash...   \n",
       "29  Semenya believes she could go the distance aft...   \n",
       "30  EpiPen shortages seen in Canada, UK but U.S. s...   \n",
       "31  Mylan seeks deal for German Merck's consumer p...   \n",
       "32  Trump to lift legal threat to states that perm...   \n",
       "33  Enforcement of Kentucky abortion law suspended...   \n",
       "34  Insect farms gear up to feed soaring global pr...   \n",
       "35  U.S. appeals court strikes down Maryland drug ...   \n",
       "36  CVS Health appoints Marc-David Munk as CMO of ...   \n",
       "37  ManorCare wins court approval to exit bankrupt...   \n",
       "38  Religious faith linked to suicidal behavior in...   \n",
       "39  Patients more satisfied when doctors treat few...   \n",
       "40  Trump's personal lawyer attacked by U.S. prose...   \n",
       "41  Trump, called an unethical liar in book, blast...   \n",
       "42  Trump says U.S. will only rejoin Pacific trade...   \n",
       "43  With Russia on his mind, Trump looks for tough...   \n",
       "44  Republican-led House panel orders interviews w...   \n",
       "45  Trump to lift legal threat to states that perm...   \n",
       "46  Ex-FBI deputy director faulted by Justice Depa...   \n",
       "47  Trump lawyer arranged $1.6 million payoff to P...   \n",
       "48  Special counsel has evidence Michael Cohen tra...   \n",
       "49     House passes bill to streamline 'Volcker Rule'   \n",
       "\n",
       "                                          Description        Category  \n",
       "0   (Reuters) - Wall Street is hoping that first-q...    businessNews  \n",
       "1   (Reuters) - Tesla Inc  will be profitable in t...    businessNews  \n",
       "2   WASHINGTON/TOKYO (Reuters) - U.S. President Do...    businessNews  \n",
       "3   (Reuters) - Two U.S. regulators have proposed ...    businessNews  \n",
       "4   (Reuters) - Facebook Inc  Chief Executive Mark...    businessNews  \n",
       "5   NEW YORK (Reuters) - General Electric Co  said...    businessNews  \n",
       "6   NEW YORK (Reuters) - Banks have not reaped the...    businessNews  \n",
       "7   MEXICO CITY/WASHINGTON (Reuters) - U.S. trade ...    businessNews  \n",
       "8   BERLIN (Reuters) - German Interior Minister Ho...    businessNews  \n",
       "9   (Reuters) - Bratz doll maker MGA Entertainment...    businessNews  \n",
       "10  (Reuters) - Facebook Inc  Chief Executive Mark...  technologyNews  \n",
       "11  (Reuters) - Tesla Inc  will be profitable in t...  technologyNews  \n",
       "12  HAMBURG (Reuters) - Satellite technology to pr...  technologyNews  \n",
       "13  MOSCOW (Reuters) - A Russian court on Friday o...  technologyNews  \n",
       "14  HONG KONG (Reuters) - China's rising investmen...  technologyNews  \n",
       "15  SINGAPORE (Reuters) - Singapore's competition ...  technologyNews  \n",
       "16  SINGAPORE (Reuters) - In the not too distant f...  technologyNews  \n",
       "17  NEW DELHI (Reuters) - Microsoft Corp is invest...  technologyNews  \n",
       "18  (Reuters) - A bipartisan group of lawmakers in...  technologyNews  \n",
       "19  SHANGHAI (Reuters) - China's Sina Weibo will r...  technologyNews  \n",
       "20  GOLD COAST, Australia (Reuters) - Usain Bolt j...      sportsNews  \n",
       "21  SHANGHAI (Reuters) - Formula One championship ...      sportsNews  \n",
       "22  (Reuters) - Michael Phelps will not be tempted...      sportsNews  \n",
       "23  (Reuters) - Australian champion mare Winx crui...      sportsNews  \n",
       "24  GOLD COAST, Australia (Reuters) - Highlights o...      sportsNews  \n",
       "25  (Reuters) - Kenyans Geoffrey Kirui and Edna Ki...      sportsNews  \n",
       "26  (Reuters) - Bryson DeChambeau birdied four of ...      sportsNews  \n",
       "27  HONG KONG (Reuters) - Australia coach Alen Sta...      sportsNews  \n",
       "28  GOLD COAST, Australia (Reuters) - Heavyweight ...      sportsNews  \n",
       "29  GOLD COAST, Australia (Reuters) - Caster Semen...      sportsNews  \n",
       "30  NEW YORK (Reuters) - Mylan N.V.'s  emergency a...      healthNews  \n",
       "31  FRANKFURT/NEW YORK (Reuters) - Generic drug ma...      healthNews  \n",
       "32  WASHINGTON (Reuters) - President Donald Trump ...      healthNews  \n",
       "33  (Reuters) - Kentucky state officials have agre...      healthNews  \n",
       "34  LANGLEY, British Columbia (Reuters) - Layers o...      healthNews  \n",
       "35  (Reuters) - A federal appeals court on Friday ...      healthNews  \n",
       "36  (Reuters) - CVS Health on Friday named Marc-Da...      healthNews  \n",
       "37  (Reuters) - No. 2 U.S. nursing home chain HCR ...      healthNews  \n",
       "38  (Reuters Health) - Although religiosity is gen...      healthNews  \n",
       "39  (Reuters Health) - Doctors who see fewer patie...      healthNews  \n",
       "40  NEW YORK (Reuters) - A U.S. prosecutor on Frid...    politicsNews  \n",
       "41  WASHINGTON/NEW YORK (Reuters) - President Dona...    politicsNews  \n",
       "42  WASHINGTON/TOKYO (Reuters) - U.S. President Do...    politicsNews  \n",
       "43  WASHINGTON (Reuters) - President Donald Trump ...    politicsNews  \n",
       "44  WASHINGTON (Reuters) - A Republican-led House ...    politicsNews  \n",
       "45  WASHINGTON (Reuters) - President Donald Trump ...    politicsNews  \n",
       "46  WASHINGTON (Reuters) - The U.S. Department of ...    politicsNews  \n",
       "47  WASHINGTON (Reuters) - U.S. President Donald T...    politicsNews  \n",
       "48  WASHINGTON (Reuters) - The U.S. special counse...    politicsNews  \n",
       "49  WASHINGTON (Reuters) - The U.S. House of Repre...    politicsNews  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Putting Data Into DataFrame\n",
    "news_df = pd.DataFrame(title_list, columns=[\"Title\"])\n",
    "news_df[\"Description\"] = description_list\n",
    "news_df[\"Category\"] = category_list\n",
    "print(len(news_df))\n",
    "news_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    (Reuters) - Wall Street is hoping that first-q...\n",
       "1    (Reuters) - Tesla Inc  will be profitable in t...\n",
       "2    WASHINGTON/TOKYO (Reuters) - U.S. President Do...\n",
       "3    (Reuters) - Two U.S. regulators have proposed ...\n",
       "4    (Reuters) - Facebook Inc  Chief Executive Mark...\n",
       "Name: Description, dtype: object"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "news_df[\"Description\"].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(Reuters) - Wall Street is hoping that first-quarter earnings growth and corporate forecasts are strong enough to bring the FAANG group of stocks back into favor and take the spotlight off worries that caused the recent sell-off in the high-flying group.<div class=\"feedflare\">\n",
      "<a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=WBFIN23VpFE:YCyjiaDuEFA:yIl2AUoC8zA\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?d=yIl2AUoC8zA\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=WBFIN23VpFE:YCyjiaDuEFA:F7zBnMyn0Lo\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=WBFIN23VpFE:YCyjiaDuEFA:F7zBnMyn0Lo\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=WBFIN23VpFE:YCyjiaDuEFA:V_sGLiPBpWU\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=WBFIN23VpFE:YCyjiaDuEFA:V_sGLiPBpWU\" border=\"0\"></img></a>\n",
      "</div><img src=\"http://feeds.feedburner.com/~r/reuters/businessNews/~4/WBFIN23VpFE\" height=\"1\" width=\"1\" alt=\"\"/>\n"
     ]
    }
   ],
   "source": [
    "print(news_df[\"Description\"][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "NEW YORK (Reuters) - Financial stocks led a drop on Wall Street on Friday, as results from big banks failed to enthuse and geopolitical tensions in Syria and Russia further unnerved investors.<div class=\"feedflare\">\n",
       "<a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=G0SWmsbH8M8:33ar9b0m6EQ:yIl2AUoC8zA\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?d=yIl2AUoC8zA\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=G0SWmsbH8M8:33ar9b0m6EQ:F7zBnMyn0Lo\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=G0SWmsbH8M8:33ar9b0m6EQ:F7zBnMyn0Lo\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=G0SWmsbH8M8:33ar9b0m6EQ:V_sGLiPBpWU\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=G0SWmsbH8M8:33ar9b0m6EQ:V_sGLiPBpWU\" border=\"0\"></img></a>\n",
       "</div><img src=\"http://feeds.feedburner.com/~r/reuters/businessNews/~4/G0SWmsbH8M8\" height=\"1\" width=\"1\" alt=\"\"/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%HTML\n",
    "NEW YORK (Reuters) - Financial stocks led a drop on Wall Street on Friday, as results from big banks failed to enthuse and geopolitical tensions in Syria and Russia further unnerved investors.<div class=\"feedflare\">\n",
    "<a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=G0SWmsbH8M8:33ar9b0m6EQ:yIl2AUoC8zA\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?d=yIl2AUoC8zA\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=G0SWmsbH8M8:33ar9b0m6EQ:F7zBnMyn0Lo\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=G0SWmsbH8M8:33ar9b0m6EQ:F7zBnMyn0Lo\" border=\"0\"></img></a> <a href=\"http://feeds.reuters.com/~ff/reuters/businessNews?a=G0SWmsbH8M8:33ar9b0m6EQ:V_sGLiPBpWU\"><img src=\"http://feeds.feedburner.com/~ff/reuters/businessNews?i=G0SWmsbH8M8:33ar9b0m6EQ:V_sGLiPBpWU\" border=\"0\"></img></a>\n",
    "</div><img src=\"http://feeds.feedburner.com/~r/reuters/businessNews/~4/G0SWmsbH8M8\" height=\"1\" width=\"1\" alt=\"\"/>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Extracting text from the description"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Description</th>\n",
       "      <th>Category</th>\n",
       "      <th>Short_description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Wall Street eyes earnings stabilizer after FAA...</td>\n",
       "      <td>(Reuters) - Wall Street is hoping that first-q...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Wall Street is hoping that first-quarter earni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Musk insists Tesla does not need more capital,...</td>\n",
       "      <td>(Reuters) - Tesla Inc  will be profitable in t...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Tesla Inc  will be profitable in the third and...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Trump says U.S. will only rejoin Pacific trade...</td>\n",
       "      <td>WASHINGTON/TOKYO (Reuters) - U.S. President Do...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>U.S. President Donald Trump said the United St...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wells Fargo faces $1 billion fine from loan ab...</td>\n",
       "      <td>(Reuters) - Two U.S. regulators have proposed ...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Two U.S. regulators have proposed Wells Fargo ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Facebook CEO's compensation jumps to $8.9 mill...</td>\n",
       "      <td>(Reuters) - Facebook Inc  Chief Executive Mark...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Facebook Inc  Chief Executive Mark Zuckerberg'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GE books $4.2 billion charge, restates earning...</td>\n",
       "      <td>NEW YORK (Reuters) - General Electric Co  said...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>General Electric Co  said on Friday it took a ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>U.S. bank executives see delayed boost from ta...</td>\n",
       "      <td>NEW YORK (Reuters) - Banks have not reaped the...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Banks have not reaped the full benefit of U.S....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>U.S. lowers NAFTA key auto content demand: aut...</td>\n",
       "      <td>MEXICO CITY/WASHINGTON (Reuters) - U.S. trade ...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>U.S. trade negotiators have significantly soft...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>German interior minister rejects union's six p...</td>\n",
       "      <td>BERLIN (Reuters) - German Interior Minister Ho...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>German Interior Minister Horst Seehofer said o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Bratz maker's CEO bids $890 million for Toys '...</td>\n",
       "      <td>(Reuters) - Bratz doll maker MGA Entertainment...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Bratz doll maker MGA Entertainment said on Fri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Facebook CEO's compensation jumps to $8.9 mill...</td>\n",
       "      <td>(Reuters) - Facebook Inc  Chief Executive Mark...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Facebook Inc  Chief Executive Mark Zuckerberg'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Musk insists Tesla does not need more capital,...</td>\n",
       "      <td>(Reuters) - Tesla Inc  will be profitable in t...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Tesla Inc  will be profitable in the third and...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>'Gold rush' for Wi-Fi on board planes spurs in...</td>\n",
       "      <td>HAMBURG (Reuters) - Satellite technology to pr...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Satellite technology to provide Wi-Fi on board...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Russia to ban Telegram messenger over encrypti...</td>\n",
       "      <td>MOSCOW (Reuters) - A Russian court on Friday o...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>A Russian court on Friday ordered that access ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Trade war or not, China is closing the gap on ...</td>\n",
       "      <td>HONG KONG (Reuters) - China's rising investmen...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>China's rising investment in research and expa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Singapore watchdog sets interim measures for U...</td>\n",
       "      <td>SINGAPORE (Reuters) - Singapore's competition ...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Singapore's competition watchdog on Friday out...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Singapore to test facial recognition on lamppo...</td>\n",
       "      <td>SINGAPORE (Reuters) - In the not too distant f...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>In the not too distant future, surveillance ca...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Microsoft auditing partner KPMG's anti-piracy ...</td>\n",
       "      <td>NEW DELHI (Reuters) - Microsoft Corp is invest...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Microsoft Corp is investigating the methods pa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Lawmakers question FBI over San Bernardino sui...</td>\n",
       "      <td>(Reuters) - A bipartisan group of lawmakers in...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>A bipartisan group of lawmakers in the U.S. Ho...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Weibo to ban gay, violent content from platform</td>\n",
       "      <td>SHANGHAI (Reuters) - China's Sina Weibo will r...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>China's Sina Weibo will remove gay and violent...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Blake heads into hiding from Bolt as Jamaica c...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Usain Bolt j...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Usain Bolt joked that he had perhaps retired t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Vettel snatches pole position in Ferrari one-two</td>\n",
       "      <td>SHANGHAI (Reuters) - Formula One championship ...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Formula One championship leader Sebastian Vett...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Olympics: Phelps says U.S. swim team can thriv...</td>\n",
       "      <td>(Reuters) - Michael Phelps will not be tempted...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Michael Phelps will not be tempted out of reti...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Horse racing: Australian mare Winx eases to re...</td>\n",
       "      <td>(Reuters) - Australian champion mare Winx crui...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Australian champion mare Winx cruised to a rec...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Highlights on day 10 of Commonwealth Games</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Highlights o...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Highlights on day 10 of the Commonwealth Games...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Athletics: Kirui and Rupp renew Boston Maratho...</td>\n",
       "      <td>(Reuters) - Kenyans Geoffrey Kirui and Edna Ki...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Kenyans Geoffrey Kirui and Edna Kiplagat could...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Golf: DeChambeau surges to halfway lead at Hil...</td>\n",
       "      <td>(Reuters) - Bryson DeChambeau birdied four of ...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Bryson DeChambeau birdied four of his final si...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Australia coach hails fighting spirit after se...</td>\n",
       "      <td>HONG KONG (Reuters) - Australia coach Alen Sta...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Australia coach Alen Stajcic hailed his team's...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>New Zealand's Nyika, women's hockey team crash...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Heavyweight ...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Heavyweight boxer David Nyika did more than ju...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Semenya believes she could go the distance aft...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Caster Semen...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Caster Semenya leaves the Commonwealth Games c...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>EpiPen shortages seen in Canada, UK but U.S. s...</td>\n",
       "      <td>NEW YORK (Reuters) - Mylan N.V.'s  emergency a...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Mylan N.V.'s  emergency allergy antidote EpiPe...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Mylan seeks deal for German Merck's consumer p...</td>\n",
       "      <td>FRANKFURT/NEW YORK (Reuters) - Generic drug ma...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Generic drug maker Mylan NV is in advanced dis...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Trump to lift legal threat to states that perm...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>President Donald Trump will lift his administr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Enforcement of Kentucky abortion law suspended...</td>\n",
       "      <td>(Reuters) - Kentucky state officials have agre...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Kentucky state officials have agreed to hold o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Insect farms gear up to feed soaring global pr...</td>\n",
       "      <td>LANGLEY, British Columbia (Reuters) - Layers o...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Layers of squirming black soldier fly larvae f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>U.S. appeals court strikes down Maryland drug ...</td>\n",
       "      <td>(Reuters) - A federal appeals court on Friday ...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>A federal appeals court on Friday declared unc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>CVS Health appoints Marc-David Munk as CMO of ...</td>\n",
       "      <td>(Reuters) - CVS Health on Friday named Marc-Da...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>CVS Health on Friday named Marc-David Munk as ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>ManorCare wins court approval to exit bankrupt...</td>\n",
       "      <td>(Reuters) - No. 2 U.S. nursing home chain HCR ...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>No. 2 U.S. nursing home chain HCR ManorCare In...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Religious faith linked to suicidal behavior in...</td>\n",
       "      <td>(Reuters Health) - Although religiosity is gen...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Although religiosity is generally tied to redu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Patients more satisfied when doctors treat few...</td>\n",
       "      <td>(Reuters Health) - Doctors who see fewer patie...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Doctors who see fewer patients may get better ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Trump's personal lawyer attacked by U.S. prose...</td>\n",
       "      <td>NEW YORK (Reuters) - A U.S. prosecutor on Frid...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>A U.S. prosecutor on Friday attacked a claim b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Trump, called an unethical liar in book, blast...</td>\n",
       "      <td>WASHINGTON/NEW YORK (Reuters) - President Dona...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>President Donald Trump attacked James Comey as...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>Trump says U.S. will only rejoin Pacific trade...</td>\n",
       "      <td>WASHINGTON/TOKYO (Reuters) - U.S. President Do...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>U.S. President Donald Trump said the United St...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>With Russia on his mind, Trump looks for tough...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>President Donald Trump is pressing for a more ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>Republican-led House panel orders interviews w...</td>\n",
       "      <td>WASHINGTON (Reuters) - A Republican-led House ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>A Republican-led House of Representatives comm...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>Trump to lift legal threat to states that perm...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>President Donald Trump will lift his administr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>Ex-FBI deputy director faulted by Justice Depa...</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. Department of ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>The U.S. Department of Justice's inspector gen...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>Trump lawyer arranged $1.6 million payoff to P...</td>\n",
       "      <td>WASHINGTON (Reuters) - U.S. President Donald T...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>U.S. President Donald Trump's personal lawyer ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>Special counsel has evidence Michael Cohen tra...</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. special counse...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>The U.S. special counsel in the Russia probe h...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>House passes bill to streamline 'Volcker Rule'</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. House of Repre...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>The U.S. House of Representatives voted on Fri...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                Title  \\\n",
       "0   Wall Street eyes earnings stabilizer after FAA...   \n",
       "1   Musk insists Tesla does not need more capital,...   \n",
       "2   Trump says U.S. will only rejoin Pacific trade...   \n",
       "3   Wells Fargo faces $1 billion fine from loan ab...   \n",
       "4   Facebook CEO's compensation jumps to $8.9 mill...   \n",
       "5   GE books $4.2 billion charge, restates earning...   \n",
       "6   U.S. bank executives see delayed boost from ta...   \n",
       "7   U.S. lowers NAFTA key auto content demand: aut...   \n",
       "8   German interior minister rejects union's six p...   \n",
       "9   Bratz maker's CEO bids $890 million for Toys '...   \n",
       "10  Facebook CEO's compensation jumps to $8.9 mill...   \n",
       "11  Musk insists Tesla does not need more capital,...   \n",
       "12  'Gold rush' for Wi-Fi on board planes spurs in...   \n",
       "13  Russia to ban Telegram messenger over encrypti...   \n",
       "14  Trade war or not, China is closing the gap on ...   \n",
       "15  Singapore watchdog sets interim measures for U...   \n",
       "16  Singapore to test facial recognition on lamppo...   \n",
       "17  Microsoft auditing partner KPMG's anti-piracy ...   \n",
       "18  Lawmakers question FBI over San Bernardino sui...   \n",
       "19    Weibo to ban gay, violent content from platform   \n",
       "20  Blake heads into hiding from Bolt as Jamaica c...   \n",
       "21   Vettel snatches pole position in Ferrari one-two   \n",
       "22  Olympics: Phelps says U.S. swim team can thriv...   \n",
       "23  Horse racing: Australian mare Winx eases to re...   \n",
       "24         Highlights on day 10 of Commonwealth Games   \n",
       "25  Athletics: Kirui and Rupp renew Boston Maratho...   \n",
       "26  Golf: DeChambeau surges to halfway lead at Hil...   \n",
       "27  Australia coach hails fighting spirit after se...   \n",
       "28  New Zealand's Nyika, women's hockey team crash...   \n",
       "29  Semenya believes she could go the distance aft...   \n",
       "30  EpiPen shortages seen in Canada, UK but U.S. s...   \n",
       "31  Mylan seeks deal for German Merck's consumer p...   \n",
       "32  Trump to lift legal threat to states that perm...   \n",
       "33  Enforcement of Kentucky abortion law suspended...   \n",
       "34  Insect farms gear up to feed soaring global pr...   \n",
       "35  U.S. appeals court strikes down Maryland drug ...   \n",
       "36  CVS Health appoints Marc-David Munk as CMO of ...   \n",
       "37  ManorCare wins court approval to exit bankrupt...   \n",
       "38  Religious faith linked to suicidal behavior in...   \n",
       "39  Patients more satisfied when doctors treat few...   \n",
       "40  Trump's personal lawyer attacked by U.S. prose...   \n",
       "41  Trump, called an unethical liar in book, blast...   \n",
       "42  Trump says U.S. will only rejoin Pacific trade...   \n",
       "43  With Russia on his mind, Trump looks for tough...   \n",
       "44  Republican-led House panel orders interviews w...   \n",
       "45  Trump to lift legal threat to states that perm...   \n",
       "46  Ex-FBI deputy director faulted by Justice Depa...   \n",
       "47  Trump lawyer arranged $1.6 million payoff to P...   \n",
       "48  Special counsel has evidence Michael Cohen tra...   \n",
       "49     House passes bill to streamline 'Volcker Rule'   \n",
       "\n",
       "                                          Description        Category  \\\n",
       "0   (Reuters) - Wall Street is hoping that first-q...    businessNews   \n",
       "1   (Reuters) - Tesla Inc  will be profitable in t...    businessNews   \n",
       "2   WASHINGTON/TOKYO (Reuters) - U.S. President Do...    businessNews   \n",
       "3   (Reuters) - Two U.S. regulators have proposed ...    businessNews   \n",
       "4   (Reuters) - Facebook Inc  Chief Executive Mark...    businessNews   \n",
       "5   NEW YORK (Reuters) - General Electric Co  said...    businessNews   \n",
       "6   NEW YORK (Reuters) - Banks have not reaped the...    businessNews   \n",
       "7   MEXICO CITY/WASHINGTON (Reuters) - U.S. trade ...    businessNews   \n",
       "8   BERLIN (Reuters) - German Interior Minister Ho...    businessNews   \n",
       "9   (Reuters) - Bratz doll maker MGA Entertainment...    businessNews   \n",
       "10  (Reuters) - Facebook Inc  Chief Executive Mark...  technologyNews   \n",
       "11  (Reuters) - Tesla Inc  will be profitable in t...  technologyNews   \n",
       "12  HAMBURG (Reuters) - Satellite technology to pr...  technologyNews   \n",
       "13  MOSCOW (Reuters) - A Russian court on Friday o...  technologyNews   \n",
       "14  HONG KONG (Reuters) - China's rising investmen...  technologyNews   \n",
       "15  SINGAPORE (Reuters) - Singapore's competition ...  technologyNews   \n",
       "16  SINGAPORE (Reuters) - In the not too distant f...  technologyNews   \n",
       "17  NEW DELHI (Reuters) - Microsoft Corp is invest...  technologyNews   \n",
       "18  (Reuters) - A bipartisan group of lawmakers in...  technologyNews   \n",
       "19  SHANGHAI (Reuters) - China's Sina Weibo will r...  technologyNews   \n",
       "20  GOLD COAST, Australia (Reuters) - Usain Bolt j...      sportsNews   \n",
       "21  SHANGHAI (Reuters) - Formula One championship ...      sportsNews   \n",
       "22  (Reuters) - Michael Phelps will not be tempted...      sportsNews   \n",
       "23  (Reuters) - Australian champion mare Winx crui...      sportsNews   \n",
       "24  GOLD COAST, Australia (Reuters) - Highlights o...      sportsNews   \n",
       "25  (Reuters) - Kenyans Geoffrey Kirui and Edna Ki...      sportsNews   \n",
       "26  (Reuters) - Bryson DeChambeau birdied four of ...      sportsNews   \n",
       "27  HONG KONG (Reuters) - Australia coach Alen Sta...      sportsNews   \n",
       "28  GOLD COAST, Australia (Reuters) - Heavyweight ...      sportsNews   \n",
       "29  GOLD COAST, Australia (Reuters) - Caster Semen...      sportsNews   \n",
       "30  NEW YORK (Reuters) - Mylan N.V.'s  emergency a...      healthNews   \n",
       "31  FRANKFURT/NEW YORK (Reuters) - Generic drug ma...      healthNews   \n",
       "32  WASHINGTON (Reuters) - President Donald Trump ...      healthNews   \n",
       "33  (Reuters) - Kentucky state officials have agre...      healthNews   \n",
       "34  LANGLEY, British Columbia (Reuters) - Layers o...      healthNews   \n",
       "35  (Reuters) - A federal appeals court on Friday ...      healthNews   \n",
       "36  (Reuters) - CVS Health on Friday named Marc-Da...      healthNews   \n",
       "37  (Reuters) - No. 2 U.S. nursing home chain HCR ...      healthNews   \n",
       "38  (Reuters Health) - Although religiosity is gen...      healthNews   \n",
       "39  (Reuters Health) - Doctors who see fewer patie...      healthNews   \n",
       "40  NEW YORK (Reuters) - A U.S. prosecutor on Frid...    politicsNews   \n",
       "41  WASHINGTON/NEW YORK (Reuters) - President Dona...    politicsNews   \n",
       "42  WASHINGTON/TOKYO (Reuters) - U.S. President Do...    politicsNews   \n",
       "43  WASHINGTON (Reuters) - President Donald Trump ...    politicsNews   \n",
       "44  WASHINGTON (Reuters) - A Republican-led House ...    politicsNews   \n",
       "45  WASHINGTON (Reuters) - President Donald Trump ...    politicsNews   \n",
       "46  WASHINGTON (Reuters) - The U.S. Department of ...    politicsNews   \n",
       "47  WASHINGTON (Reuters) - U.S. President Donald T...    politicsNews   \n",
       "48  WASHINGTON (Reuters) - The U.S. special counse...    politicsNews   \n",
       "49  WASHINGTON (Reuters) - The U.S. House of Repre...    politicsNews   \n",
       "\n",
       "                                    Short_description  \n",
       "0   Wall Street is hoping that first-quarter earni...  \n",
       "1   Tesla Inc  will be profitable in the third and...  \n",
       "2   U.S. President Donald Trump said the United St...  \n",
       "3   Two U.S. regulators have proposed Wells Fargo ...  \n",
       "4   Facebook Inc  Chief Executive Mark Zuckerberg'...  \n",
       "5   General Electric Co  said on Friday it took a ...  \n",
       "6   Banks have not reaped the full benefit of U.S....  \n",
       "7   U.S. trade negotiators have significantly soft...  \n",
       "8   German Interior Minister Horst Seehofer said o...  \n",
       "9   Bratz doll maker MGA Entertainment said on Fri...  \n",
       "10  Facebook Inc  Chief Executive Mark Zuckerberg'...  \n",
       "11  Tesla Inc  will be profitable in the third and...  \n",
       "12  Satellite technology to provide Wi-Fi on board...  \n",
       "13  A Russian court on Friday ordered that access ...  \n",
       "14  China's rising investment in research and expa...  \n",
       "15  Singapore's competition watchdog on Friday out...  \n",
       "16  In the not too distant future, surveillance ca...  \n",
       "17  Microsoft Corp is investigating the methods pa...  \n",
       "18  A bipartisan group of lawmakers in the U.S. Ho...  \n",
       "19  China's Sina Weibo will remove gay and violent...  \n",
       "20  Usain Bolt joked that he had perhaps retired t...  \n",
       "21  Formula One championship leader Sebastian Vett...  \n",
       "22  Michael Phelps will not be tempted out of reti...  \n",
       "23  Australian champion mare Winx cruised to a rec...  \n",
       "24  Highlights on day 10 of the Commonwealth Games...  \n",
       "25  Kenyans Geoffrey Kirui and Edna Kiplagat could...  \n",
       "26  Bryson DeChambeau birdied four of his final si...  \n",
       "27  Australia coach Alen Stajcic hailed his team's...  \n",
       "28  Heavyweight boxer David Nyika did more than ju...  \n",
       "29  Caster Semenya leaves the Commonwealth Games c...  \n",
       "30  Mylan N.V.'s  emergency allergy antidote EpiPe...  \n",
       "31  Generic drug maker Mylan NV is in advanced dis...  \n",
       "32  President Donald Trump will lift his administr...  \n",
       "33  Kentucky state officials have agreed to hold o...  \n",
       "34  Layers of squirming black soldier fly larvae f...  \n",
       "35  A federal appeals court on Friday declared unc...  \n",
       "36  CVS Health on Friday named Marc-David Munk as ...  \n",
       "37  No. 2 U.S. nursing home chain HCR ManorCare In...  \n",
       "38  Although religiosity is generally tied to redu...  \n",
       "39  Doctors who see fewer patients may get better ...  \n",
       "40  A U.S. prosecutor on Friday attacked a claim b...  \n",
       "41  President Donald Trump attacked James Comey as...  \n",
       "42  U.S. President Donald Trump said the United St...  \n",
       "43  President Donald Trump is pressing for a more ...  \n",
       "44  A Republican-led House of Representatives comm...  \n",
       "45  President Donald Trump will lift his administr...  \n",
       "46  The U.S. Department of Justice's inspector gen...  \n",
       "47  U.S. President Donald Trump's personal lawyer ...  \n",
       "48  The U.S. special counsel in the Russia probe h...  \n",
       "49  The U.S. House of Representatives voted on Fri...  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create A Short Description\n",
    "news_df[\"Short_description\"] = [item[item.find(\" - \")+3:item.find(\"<\")] for item in news_df[\"Description\"]]\n",
    "news_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Description</th>\n",
       "      <th>Category</th>\n",
       "      <th>Short_description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Wall Street eyes earnings stabilizer after FAA...</td>\n",
       "      <td>(Reuters) - Wall Street is hoping that first-q...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Wall Street is hoping that first-quarter earni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Musk insists Tesla does not need more capital,...</td>\n",
       "      <td>(Reuters) - Tesla Inc  will be profitable in t...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Tesla Inc  will be profitable in the third and...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Trump says U.S. will only rejoin Pacific trade...</td>\n",
       "      <td>WASHINGTON/TOKYO (Reuters) - U.S. President Do...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>U.S. President Donald Trump said the United St...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wells Fargo faces $1 billion fine from loan ab...</td>\n",
       "      <td>(Reuters) - Two U.S. regulators have proposed ...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Two U.S. regulators have proposed Wells Fargo ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Facebook CEO's compensation jumps to $8.9 mill...</td>\n",
       "      <td>(Reuters) - Facebook Inc  Chief Executive Mark...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Facebook Inc  Chief Executive Mark Zuckerberg'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GE books $4.2 billion charge, restates earning...</td>\n",
       "      <td>NEW YORK (Reuters) - General Electric Co  said...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>General Electric Co  said on Friday it took a ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>U.S. bank executives see delayed boost from ta...</td>\n",
       "      <td>NEW YORK (Reuters) - Banks have not reaped the...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Banks have not reaped the full benefit of U.S....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>U.S. lowers NAFTA key auto content demand: aut...</td>\n",
       "      <td>MEXICO CITY/WASHINGTON (Reuters) - U.S. trade ...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>U.S. trade negotiators have significantly soft...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>German interior minister rejects union's six p...</td>\n",
       "      <td>BERLIN (Reuters) - German Interior Minister Ho...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>German Interior Minister Horst Seehofer said o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Bratz maker's CEO bids $890 million for Toys '...</td>\n",
       "      <td>(Reuters) - Bratz doll maker MGA Entertainment...</td>\n",
       "      <td>businessNews</td>\n",
       "      <td>Bratz doll maker MGA Entertainment said on Fri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Facebook CEO's compensation jumps to $8.9 mill...</td>\n",
       "      <td>(Reuters) - Facebook Inc  Chief Executive Mark...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Facebook Inc  Chief Executive Mark Zuckerberg'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Musk insists Tesla does not need more capital,...</td>\n",
       "      <td>(Reuters) - Tesla Inc  will be profitable in t...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Tesla Inc  will be profitable in the third and...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>'Gold rush' for Wi-Fi on board planes spurs in...</td>\n",
       "      <td>HAMBURG (Reuters) - Satellite technology to pr...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Satellite technology to provide Wi-Fi on board...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Russia to ban Telegram messenger over encrypti...</td>\n",
       "      <td>MOSCOW (Reuters) - A Russian court on Friday o...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>A Russian court on Friday ordered that access ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Trade war or not, China is closing the gap on ...</td>\n",
       "      <td>HONG KONG (Reuters) - China's rising investmen...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>China's rising investment in research and expa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Singapore watchdog sets interim measures for U...</td>\n",
       "      <td>SINGAPORE (Reuters) - Singapore's competition ...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Singapore's competition watchdog on Friday out...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Singapore to test facial recognition on lamppo...</td>\n",
       "      <td>SINGAPORE (Reuters) - In the not too distant f...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>In the not too distant future, surveillance ca...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Microsoft auditing partner KPMG's anti-piracy ...</td>\n",
       "      <td>NEW DELHI (Reuters) - Microsoft Corp is invest...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>Microsoft Corp is investigating the methods pa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Lawmakers question FBI over San Bernardino sui...</td>\n",
       "      <td>(Reuters) - A bipartisan group of lawmakers in...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>A bipartisan group of lawmakers in the U.S. Ho...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Weibo to ban gay, violent content from platform</td>\n",
       "      <td>SHANGHAI (Reuters) - China's Sina Weibo will r...</td>\n",
       "      <td>technologyNews</td>\n",
       "      <td>China's Sina Weibo will remove gay and violent...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Blake heads into hiding from Bolt as Jamaica c...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Usain Bolt j...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Usain Bolt joked that he had perhaps retired t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Vettel snatches pole position in Ferrari one-two</td>\n",
       "      <td>SHANGHAI (Reuters) - Formula One championship ...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Formula One championship leader Sebastian Vett...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Olympics: Phelps says U.S. swim team can thriv...</td>\n",
       "      <td>(Reuters) - Michael Phelps will not be tempted...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Michael Phelps will not be tempted out of reti...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Horse racing: Australian mare Winx eases to re...</td>\n",
       "      <td>(Reuters) - Australian champion mare Winx crui...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Australian champion mare Winx cruised to a rec...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Highlights on day 10 of Commonwealth Games</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Highlights o...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Highlights on day 10 of the Commonwealth Games...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Athletics: Kirui and Rupp renew Boston Maratho...</td>\n",
       "      <td>(Reuters) - Kenyans Geoffrey Kirui and Edna Ki...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Kenyans Geoffrey Kirui and Edna Kiplagat could...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Golf: DeChambeau surges to halfway lead at Hil...</td>\n",
       "      <td>(Reuters) - Bryson DeChambeau birdied four of ...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Bryson DeChambeau birdied four of his final si...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Australia coach hails fighting spirit after se...</td>\n",
       "      <td>HONG KONG (Reuters) - Australia coach Alen Sta...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Australia coach Alen Stajcic hailed his team's...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>New Zealand's Nyika, women's hockey team crash...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Heavyweight ...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Heavyweight boxer David Nyika did more than ju...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Semenya believes she could go the distance aft...</td>\n",
       "      <td>GOLD COAST, Australia (Reuters) - Caster Semen...</td>\n",
       "      <td>sportsNews</td>\n",
       "      <td>Caster Semenya leaves the Commonwealth Games c...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>EpiPen shortages seen in Canada, UK but U.S. s...</td>\n",
       "      <td>NEW YORK (Reuters) - Mylan N.V.'s  emergency a...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Mylan N.V.'s  emergency allergy antidote EpiPe...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Mylan seeks deal for German Merck's consumer p...</td>\n",
       "      <td>FRANKFURT/NEW YORK (Reuters) - Generic drug ma...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Generic drug maker Mylan NV is in advanced dis...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Trump to lift legal threat to states that perm...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>President Donald Trump will lift his administr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Enforcement of Kentucky abortion law suspended...</td>\n",
       "      <td>(Reuters) - Kentucky state officials have agre...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Kentucky state officials have agreed to hold o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Insect farms gear up to feed soaring global pr...</td>\n",
       "      <td>LANGLEY, British Columbia (Reuters) - Layers o...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Layers of squirming black soldier fly larvae f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>U.S. appeals court strikes down Maryland drug ...</td>\n",
       "      <td>(Reuters) - A federal appeals court on Friday ...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>A federal appeals court on Friday declared unc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>CVS Health appoints Marc-David Munk as CMO of ...</td>\n",
       "      <td>(Reuters) - CVS Health on Friday named Marc-Da...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>CVS Health on Friday named Marc-David Munk as ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>ManorCare wins court approval to exit bankrupt...</td>\n",
       "      <td>(Reuters) - No. 2 U.S. nursing home chain HCR ...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>No. 2 U.S. nursing home chain HCR ManorCare In...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Religious faith linked to suicidal behavior in...</td>\n",
       "      <td>(Reuters Health) - Although religiosity is gen...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Although religiosity is generally tied to redu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Patients more satisfied when doctors treat few...</td>\n",
       "      <td>(Reuters Health) - Doctors who see fewer patie...</td>\n",
       "      <td>healthNews</td>\n",
       "      <td>Doctors who see fewer patients may get better ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Trump's personal lawyer attacked by U.S. prose...</td>\n",
       "      <td>NEW YORK (Reuters) - A U.S. prosecutor on Frid...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>A U.S. prosecutor on Friday attacked a claim b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Trump, called an unethical liar in book, blast...</td>\n",
       "      <td>WASHINGTON/NEW YORK (Reuters) - President Dona...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>President Donald Trump attacked James Comey as...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>Trump says U.S. will only rejoin Pacific trade...</td>\n",
       "      <td>WASHINGTON/TOKYO (Reuters) - U.S. President Do...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>U.S. President Donald Trump said the United St...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>With Russia on his mind, Trump looks for tough...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>President Donald Trump is pressing for a more ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>Republican-led House panel orders interviews w...</td>\n",
       "      <td>WASHINGTON (Reuters) - A Republican-led House ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>A Republican-led House of Representatives comm...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>Trump to lift legal threat to states that perm...</td>\n",
       "      <td>WASHINGTON (Reuters) - President Donald Trump ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>President Donald Trump will lift his administr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>Ex-FBI deputy director faulted by Justice Depa...</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. Department of ...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>The U.S. Department of Justice's inspector gen...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>Trump lawyer arranged $1.6 million payoff to P...</td>\n",
       "      <td>WASHINGTON (Reuters) - U.S. President Donald T...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>U.S. President Donald Trump's personal lawyer ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>Special counsel has evidence Michael Cohen tra...</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. special counse...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>The U.S. special counsel in the Russia probe h...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>House passes bill to streamline 'Volcker Rule'</td>\n",
       "      <td>WASHINGTON (Reuters) - The U.S. House of Repre...</td>\n",
       "      <td>politicsNews</td>\n",
       "      <td>The U.S. House of Representatives voted on Fri...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                Title  \\\n",
       "0   Wall Street eyes earnings stabilizer after FAA...   \n",
       "1   Musk insists Tesla does not need more capital,...   \n",
       "2   Trump says U.S. will only rejoin Pacific trade...   \n",
       "3   Wells Fargo faces $1 billion fine from loan ab...   \n",
       "4   Facebook CEO's compensation jumps to $8.9 mill...   \n",
       "5   GE books $4.2 billion charge, restates earning...   \n",
       "6   U.S. bank executives see delayed boost from ta...   \n",
       "7   U.S. lowers NAFTA key auto content demand: aut...   \n",
       "8   German interior minister rejects union's six p...   \n",
       "9   Bratz maker's CEO bids $890 million for Toys '...   \n",
       "10  Facebook CEO's compensation jumps to $8.9 mill...   \n",
       "11  Musk insists Tesla does not need more capital,...   \n",
       "12  'Gold rush' for Wi-Fi on board planes spurs in...   \n",
       "13  Russia to ban Telegram messenger over encrypti...   \n",
       "14  Trade war or not, China is closing the gap on ...   \n",
       "15  Singapore watchdog sets interim measures for U...   \n",
       "16  Singapore to test facial recognition on lamppo...   \n",
       "17  Microsoft auditing partner KPMG's anti-piracy ...   \n",
       "18  Lawmakers question FBI over San Bernardino sui...   \n",
       "19    Weibo to ban gay, violent content from platform   \n",
       "20  Blake heads into hiding from Bolt as Jamaica c...   \n",
       "21   Vettel snatches pole position in Ferrari one-two   \n",
       "22  Olympics: Phelps says U.S. swim team can thriv...   \n",
       "23  Horse racing: Australian mare Winx eases to re...   \n",
       "24         Highlights on day 10 of Commonwealth Games   \n",
       "25  Athletics: Kirui and Rupp renew Boston Maratho...   \n",
       "26  Golf: DeChambeau surges to halfway lead at Hil...   \n",
       "27  Australia coach hails fighting spirit after se...   \n",
       "28  New Zealand's Nyika, women's hockey team crash...   \n",
       "29  Semenya believes she could go the distance aft...   \n",
       "30  EpiPen shortages seen in Canada, UK but U.S. s...   \n",
       "31  Mylan seeks deal for German Merck's consumer p...   \n",
       "32  Trump to lift legal threat to states that perm...   \n",
       "33  Enforcement of Kentucky abortion law suspended...   \n",
       "34  Insect farms gear up to feed soaring global pr...   \n",
       "35  U.S. appeals court strikes down Maryland drug ...   \n",
       "36  CVS Health appoints Marc-David Munk as CMO of ...   \n",
       "37  ManorCare wins court approval to exit bankrupt...   \n",
       "38  Religious faith linked to suicidal behavior in...   \n",
       "39  Patients more satisfied when doctors treat few...   \n",
       "40  Trump's personal lawyer attacked by U.S. prose...   \n",
       "41  Trump, called an unethical liar in book, blast...   \n",
       "42  Trump says U.S. will only rejoin Pacific trade...   \n",
       "43  With Russia on his mind, Trump looks for tough...   \n",
       "44  Republican-led House panel orders interviews w...   \n",
       "45  Trump to lift legal threat to states that perm...   \n",
       "46  Ex-FBI deputy director faulted by Justice Depa...   \n",
       "47  Trump lawyer arranged $1.6 million payoff to P...   \n",
       "48  Special counsel has evidence Michael Cohen tra...   \n",
       "49     House passes bill to streamline 'Volcker Rule'   \n",
       "\n",
       "                                          Description        Category  \\\n",
       "0   (Reuters) - Wall Street is hoping that first-q...    businessNews   \n",
       "1   (Reuters) - Tesla Inc  will be profitable in t...    businessNews   \n",
       "2   WASHINGTON/TOKYO (Reuters) - U.S. President Do...    businessNews   \n",
       "3   (Reuters) - Two U.S. regulators have proposed ...    businessNews   \n",
       "4   (Reuters) - Facebook Inc  Chief Executive Mark...    businessNews   \n",
       "5   NEW YORK (Reuters) - General Electric Co  said...    businessNews   \n",
       "6   NEW YORK (Reuters) - Banks have not reaped the...    businessNews   \n",
       "7   MEXICO CITY/WASHINGTON (Reuters) - U.S. trade ...    businessNews   \n",
       "8   BERLIN (Reuters) - German Interior Minister Ho...    businessNews   \n",
       "9   (Reuters) - Bratz doll maker MGA Entertainment...    businessNews   \n",
       "10  (Reuters) - Facebook Inc  Chief Executive Mark...  technologyNews   \n",
       "11  (Reuters) - Tesla Inc  will be profitable in t...  technologyNews   \n",
       "12  HAMBURG (Reuters) - Satellite technology to pr...  technologyNews   \n",
       "13  MOSCOW (Reuters) - A Russian court on Friday o...  technologyNews   \n",
       "14  HONG KONG (Reuters) - China's rising investmen...  technologyNews   \n",
       "15  SINGAPORE (Reuters) - Singapore's competition ...  technologyNews   \n",
       "16  SINGAPORE (Reuters) - In the not too distant f...  technologyNews   \n",
       "17  NEW DELHI (Reuters) - Microsoft Corp is invest...  technologyNews   \n",
       "18  (Reuters) - A bipartisan group of lawmakers in...  technologyNews   \n",
       "19  SHANGHAI (Reuters) - China's Sina Weibo will r...  technologyNews   \n",
       "20  GOLD COAST, Australia (Reuters) - Usain Bolt j...      sportsNews   \n",
       "21  SHANGHAI (Reuters) - Formula One championship ...      sportsNews   \n",
       "22  (Reuters) - Michael Phelps will not be tempted...      sportsNews   \n",
       "23  (Reuters) - Australian champion mare Winx crui...      sportsNews   \n",
       "24  GOLD COAST, Australia (Reuters) - Highlights o...      sportsNews   \n",
       "25  (Reuters) - Kenyans Geoffrey Kirui and Edna Ki...      sportsNews   \n",
       "26  (Reuters) - Bryson DeChambeau birdied four of ...      sportsNews   \n",
       "27  HONG KONG (Reuters) - Australia coach Alen Sta...      sportsNews   \n",
       "28  GOLD COAST, Australia (Reuters) - Heavyweight ...      sportsNews   \n",
       "29  GOLD COAST, Australia (Reuters) - Caster Semen...      sportsNews   \n",
       "30  NEW YORK (Reuters) - Mylan N.V.'s  emergency a...      healthNews   \n",
       "31  FRANKFURT/NEW YORK (Reuters) - Generic drug ma...      healthNews   \n",
       "32  WASHINGTON (Reuters) - President Donald Trump ...      healthNews   \n",
       "33  (Reuters) - Kentucky state officials have agre...      healthNews   \n",
       "34  LANGLEY, British Columbia (Reuters) - Layers o...      healthNews   \n",
       "35  (Reuters) - A federal appeals court on Friday ...      healthNews   \n",
       "36  (Reuters) - CVS Health on Friday named Marc-Da...      healthNews   \n",
       "37  (Reuters) - No. 2 U.S. nursing home chain HCR ...      healthNews   \n",
       "38  (Reuters Health) - Although religiosity is gen...      healthNews   \n",
       "39  (Reuters Health) - Doctors who see fewer patie...      healthNews   \n",
       "40  NEW YORK (Reuters) - A U.S. prosecutor on Frid...    politicsNews   \n",
       "41  WASHINGTON/NEW YORK (Reuters) - President Dona...    politicsNews   \n",
       "42  WASHINGTON/TOKYO (Reuters) - U.S. President Do...    politicsNews   \n",
       "43  WASHINGTON (Reuters) - President Donald Trump ...    politicsNews   \n",
       "44  WASHINGTON (Reuters) - A Republican-led House ...    politicsNews   \n",
       "45  WASHINGTON (Reuters) - President Donald Trump ...    politicsNews   \n",
       "46  WASHINGTON (Reuters) - The U.S. Department of ...    politicsNews   \n",
       "47  WASHINGTON (Reuters) - U.S. President Donald T...    politicsNews   \n",
       "48  WASHINGTON (Reuters) - The U.S. special counse...    politicsNews   \n",
       "49  WASHINGTON (Reuters) - The U.S. House of Repre...    politicsNews   \n",
       "\n",
       "                                    Short_description  \n",
       "0   Wall Street is hoping that first-quarter earni...  \n",
       "1   Tesla Inc  will be profitable in the third and...  \n",
       "2   U.S. President Donald Trump said the United St...  \n",
       "3   Two U.S. regulators have proposed Wells Fargo ...  \n",
       "4   Facebook Inc  Chief Executive Mark Zuckerberg'...  \n",
       "5   General Electric Co  said on Friday it took a ...  \n",
       "6   Banks have not reaped the full benefit of U.S....  \n",
       "7   U.S. trade negotiators have significantly soft...  \n",
       "8   German Interior Minister Horst Seehofer said o...  \n",
       "9   Bratz doll maker MGA Entertainment said on Fri...  \n",
       "10  Facebook Inc  Chief Executive Mark Zuckerberg'...  \n",
       "11  Tesla Inc  will be profitable in the third and...  \n",
       "12  Satellite technology to provide Wi-Fi on board...  \n",
       "13  A Russian court on Friday ordered that access ...  \n",
       "14  China's rising investment in research and expa...  \n",
       "15  Singapore's competition watchdog on Friday out...  \n",
       "16  In the not too distant future, surveillance ca...  \n",
       "17  Microsoft Corp is investigating the methods pa...  \n",
       "18  A bipartisan group of lawmakers in the U.S. Ho...  \n",
       "19  China's Sina Weibo will remove gay and violent...  \n",
       "20  Usain Bolt joked that he had perhaps retired t...  \n",
       "21  Formula One championship leader Sebastian Vett...  \n",
       "22  Michael Phelps will not be tempted out of reti...  \n",
       "23  Australian champion mare Winx cruised to a rec...  \n",
       "24  Highlights on day 10 of the Commonwealth Games...  \n",
       "25  Kenyans Geoffrey Kirui and Edna Kiplagat could...  \n",
       "26  Bryson DeChambeau birdied four of his final si...  \n",
       "27  Australia coach Alen Stajcic hailed his team's...  \n",
       "28  Heavyweight boxer David Nyika did more than ju...  \n",
       "29  Caster Semenya leaves the Commonwealth Games c...  \n",
       "30  Mylan N.V.'s  emergency allergy antidote EpiPe...  \n",
       "31  Generic drug maker Mylan NV is in advanced dis...  \n",
       "32  President Donald Trump will lift his administr...  \n",
       "33  Kentucky state officials have agreed to hold o...  \n",
       "34  Layers of squirming black soldier fly larvae f...  \n",
       "35  A federal appeals court on Friday declared unc...  \n",
       "36  CVS Health on Friday named Marc-David Munk as ...  \n",
       "37  No. 2 U.S. nursing home chain HCR ManorCare In...  \n",
       "38  Although religiosity is generally tied to redu...  \n",
       "39  Doctors who see fewer patients may get better ...  \n",
       "40  A U.S. prosecutor on Friday attacked a claim b...  \n",
       "41  President Donald Trump attacked James Comey as...  \n",
       "42  U.S. President Donald Trump said the United St...  \n",
       "43  President Donald Trump is pressing for a more ...  \n",
       "44  A Republican-led House of Representatives comm...  \n",
       "45  President Donald Trump will lift his administr...  \n",
       "46  The U.S. Department of Justice's inspector gen...  \n",
       "47  U.S. President Donald Trump's personal lawyer ...  \n",
       "48  The U.S. special counsel in the Russia probe h...  \n",
       "49  The U.S. House of Representatives voted on Fri...  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "news_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save to A CSV File\n",
    "news_df.to_csv(\"ReutersNewsDataFinal2.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Feature Extraction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/rooot/.local/lib/python3.5/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.cross_validation import train_test_split\n",
    "from sklearn.naive_bayes import MultinomialNB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "corpus = news_df[\"Short_description\"]\n",
    "vectorizer = CountVectorizer(min_df=1)\n",
    "X = vectorizer.fit_transform(corpus).toarray()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(50, 846)\n"
     ]
    }
   ],
   "source": [
    "# Shape of Our Data\n",
    "print(X.shape)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Features\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['000',\n",
       " '10',\n",
       " '100',\n",
       " '11',\n",
       " '11th',\n",
       " '2016',\n",
       " '2017',\n",
       " '2020',\n",
       " '24',\n",
       " '25th',\n",
       " '30',\n",
       " '33',\n",
       " '4x100m',\n",
       " '53',\n",
       " '64',\n",
       " '65',\n",
       " '890',\n",
       " 'abortion',\n",
       " 'about',\n",
       " 'absence',\n",
       " 'abuses',\n",
       " 'access',\n",
       " 'according',\n",
       " 'accounting',\n",
       " 'achieved']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Names of Vectorized Features\n",
    "vectorizer.get_feature_names()[:25]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'businessNews': 0,\n",
       " 'healthNews': 3,\n",
       " 'politicsNews': 4,\n",
       " 'sportsNews': 2,\n",
       " 'technologyNews': 1}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Building a Map of Categories =Making Categories Numerical since ML understands numbers better\n",
    "categories = news_df[\"Category\"].unique()\n",
    "category_dict = {value:index for index, value in enumerate(categories)}\n",
    "results = news_df[\"Category\"].map(category_dict)\n",
    "category_dict\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "corpus size: 846\n"
     ]
    }
   ],
   "source": [
    "print(\"corpus size: %s\" % len(vectorizer.get_feature_names()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     0\n",
       "1     0\n",
       "2     0\n",
       "3     0\n",
       "4     0\n",
       "5     0\n",
       "6     0\n",
       "7     0\n",
       "8     0\n",
       "9     0\n",
       "10    1\n",
       "11    1\n",
       "12    1\n",
       "13    1\n",
       "14    1\n",
       "15    1\n",
       "16    1\n",
       "17    1\n",
       "18    1\n",
       "19    1\n",
       "20    2\n",
       "21    2\n",
       "22    2\n",
       "23    2\n",
       "24    2\n",
       "25    2\n",
       "26    2\n",
       "27    2\n",
       "28    2\n",
       "29    2\n",
       "30    3\n",
       "31    3\n",
       "32    3\n",
       "33    3\n",
       "34    3\n",
       "35    3\n",
       "36    3\n",
       "37    3\n",
       "38    3\n",
       "39    3\n",
       "40    4\n",
       "41    4\n",
       "42    4\n",
       "43    4\n",
       "44    4\n",
       "45    4\n",
       "46    4\n",
       "47    4\n",
       "48    4\n",
       "49    4\n",
       "Name: Category, dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Labels\n",
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split Dataset into Test and Training Data\n",
    "x_train,x_test, y_train,y_test = train_test_split(X, results, test_size=0.2, random_state=1, )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using NaiveBaiyes Multinomial Classifier\n",
    "clf = MultinomialNB()\n",
    "clf.fit(x_train, y_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy of our model score:  0.5\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy of our model score: \",clf.score(x_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4, 3, 4, 1, 4, 0, 4, 2, 1, 0])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.predict(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'businessNews': 0,\n",
       " 'healthNews': 3,\n",
       " 'politicsNews': 4,\n",
       " 'sportsNews': 2,\n",
       " 'technologyNews': 1}"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "category_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Sample Prediction of Category of News"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = [\"Russian Hackers hijack US Election \"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Vectorize and Transform text\n",
    "vec_text = vectorizer.transform(text).toarray()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Predict\n",
    "clf.predict(vec_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#category_dict.keys()[category_dict.values().index(clf.predict(vec_text)[0])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A function to do it\n",
    "def newscategorifier(a):\n",
    "    test_name1 = [a]\n",
    "    transform_vect =vectorizer.transform(text).toarray()\n",
    "    if clf.predict(transform_vect) == 0:\n",
    "        print(\"Business News\")\n",
    "    elif clf.predict(transform_vect) == 1:\n",
    "        print(\"Technology News\")\n",
    "    elif clf.predict(transform_vect) == 2:\n",
    "        print(\"Sport News\")\n",
    "    elif clf.predict(transform_vect) == 3:\n",
    "        print(\"Health News\")\n",
    "    else:\n",
    "        print(\"Politcs News\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Technology News\n"
     ]
    }
   ],
   "source": [
    "newscategorifier(\"Python and Julia for Computer Scientist\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Save Our Model to be used\n",
    "from sklearn.externals import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "NaiveBayModel = open(\"newsclassifierNBmodel.pkl\",\"wb\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "joblib.dump(clf,NaiveBayModel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Thanks For Watching\n",
    "# J-Secur1ty by Jesse\n",
    "# Jesus Saves @ JCharisTech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
